library(Seurat)
## Registered S3 method overwritten by 'spatstat.core':
##   method          from
##   formula.glmmPQL MASS
## Attaching SeuratObject
## Attaching sp
library(ggplot2)
library(Rtsne)
library(rgl)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, running with 'rgl.useNULL = TRUE'.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
library(RColorBrewer)
library(kohonen)

Completed table

# load("Neftel 500 6000.RData")
# #Reading output from Differential Expression
# neftel.markers <-  readRDS("NeftelDElist.RDS")
# #Reading the outputfrom seurat cell cycle phase assignment
# neftel.cellcycle <- readRDS("NeftelCellScoringUPDATED.RDS")
exp.matFINAL <- read.table(file = "leblancmatrix2002000(scaled).txt", header = TRUE, as.is = TRUE, row.names = 1)
#View(neftel.markers)


# # Cut down expression table to just DE genes
# exp.matDE <- exp.matFINAL[rownames(exp.matFINAL) %in% leblanc.markers$gene,]
# #Obtain filtered cells and phases
# cellcycle.assignment<-data.frame(sort(neftel.cellcycle$Phase))
# #Cut down expression table by relevant cells only 
# exp.matDEfinal <- exp.matDE[colnames(exp.matDE) %in% rownames(cellcycle.assignment)]
# #Order cells by phase
# colnames(exp.matDEfinal) <- intersect(rownames(cellcycle.assignment),colnames(exp.matDEfinal))
###############################################
#Average expression table and relevant plots

###############################################
load("Leblanc 200 2000.RData")

# Find the average expression of the genes in each phase
leblancavg <- AverageExpression(leblanc,slot = "scale.data") 

# Filter the average expression table down to just the DE genes
leblancavg <- as.data.frame(leblancavg)
length(intersect(rownames(leblancavg),leblanc.markers$gene)) #184
## [1] 184
leblancavg <- leblancavg[rownames(leblancavg) %in% leblanc.markers$gene,]



###PCA CLUSTERING

leblancpca <- prcomp(leblancavg)

pca.scores <- data.frame(leblancpca$x)

leblanc.val <- cbind(leblancavg,pca.scores)
head(leblanc.val)
##         RNA.G2M     RNA.G1       RNA.S         PC1        PC2           PC3
## STMN1 1.0712511 -0.4369753  0.43561460 -0.07955348  0.2566957  1.528159e-07
## HMGN2 1.2685131 -0.4806220  0.41208763 -0.28128889  0.2826509  1.917924e-07
## CLSPN 0.4336551 -0.4484289  0.94143266  0.64749697  0.6226320 -3.819931e-07
## CDCA8 1.4037896 -0.3483567 -0.06105879 -0.49938170 -0.1778889  6.532607e-07
## CDC20 1.6650299 -0.3774175 -0.17320052 -0.78067800 -0.2282792  7.819159e-07
## KIF2C 1.4195372 -0.3682884 -0.01659434 -0.50739970 -0.1273117  6.122882e-07
### SOM CLUSTERING

leblanc.val$gene <- rownames(leblanc.val)
leblanc.val <- leblanc.val %>% select(gene, everything())

som.data <- as.matrix(leblanc.val[,c(2:4)])
set.seed(2023)

som.data <- som(som.data, grid = somgrid(6, 6, "hexagonal"))
summary(som.data)
## SOM of size 6x6 with a hexagonal topology and a bubble neighbourhood function.
## The number of data layers is 1.
## Distance measure(s) used: sumofsquares.
## Training data included: 184 objects.
## Mean distance to the closest unit in the map: 0.007.
plot(som.data, main = "leblanc data",shape="straight")
## Warning in par(opar): argument 1 does not name a graphical parameter

# TACC3 cluster list
somclusterlist <- data.frame(rownames(leblanc.val),som.data$unit.classif)
grep("^23$",somclusterlist$som.data.unit.classif,value =T)
## [1] "23" "23" "23" "23"
TACC3list.leblanc <- somclusterlist[grep("^23$",somclusterlist$som.data.unit.classif),]


## use hierarchical clustering to cluster 
som.data$codes <- as.data.frame(som.data$codes)
som_cluster <- cutree(hclust(dist(som.data$codes)),k = 5)
# plot these results:
plot(som.data, type="mapping", bgcol = som_cluster, main = "Clusters")
add.cluster.boundaries(som.data, som_cluster) 

### 3D scatter plot
#plot3d(leblancavg)
#rglwidget()
#Dotplot of TACC3 cluster
DotPlot(leblanc, features = TACC3list.leblanc$rownames.leblanc.val. ,dot.scale = 8) + RotatedAxis()
## Warning: Scaling data with a low number of groups may produce misleading results

SOM clustering on all genes

leblancavgall <- AverageExpression(leblanc,slot = "scale.data") 
leblancavgall <- as.data.frame(leblancavgall)

#PCA CLUSTERING

leblancallpca <- prcomp(leblancavgall)

allpca.scores <- data.frame(leblancallpca$x)

leblanc.valall <- cbind(leblancavgall,allpca.scores)
head(leblanc.valall)
##                    RNA.G2M       RNA.G1        RNA.S          PC1           PC2
## FO538757.2    -0.007467288  0.010400026 -0.023757525 -0.030333729  0.0178150679
## AP006222.2    -0.078484560  0.071572920 -0.143375748 -0.157527978  0.1009754532
## RP4-669L17.10 -0.022351914 -0.004044557 -0.013357627 -0.034068039 -0.0007278859
## RP11-206L10.9 -0.007157648 -0.001265398  0.007934584 -0.014111910 -0.0118040256
## LINC00115     -0.034101518  0.017724276 -0.026860368 -0.056923574  0.0097439200
## FAM41C         0.020441711  0.003944667 -0.030463548 -0.006612753  0.0351644067
##                         PC3
## FO538757.2     0.0014726278
## AP006222.2     0.0017340089
## RP4-669L17.10 -0.0119649964
## RP11-206L10.9  0.0011124561
## LINC00115      0.0007717643
## FAM41C         0.0001050152
# SOM CLUSTERING

leblanc.valall$gene <- rownames(leblanc.valall)
leblanc.valall <- leblanc.valall %>% select(gene, everything())

som.data <- as.matrix(leblanc.valall[,c(2:4)])
set.seed(2023)
som.data <- som(som.data, grid = somgrid(6, 6, "hexagonal"))
summary(som.data)
## SOM of size 6x6 with a hexagonal topology and a bubble neighbourhood function.
## The number of data layers is 1.
## Distance measure(s) used: sumofsquares.
## Training data included: 15234 objects.
## Mean distance to the closest unit in the map: 0.002.
plot(som.data, main = "leblanc data",shape="straight")
## Warning in par(opar): argument 1 does not name a graphical parameter

somclusterlist <- data.frame(rownames(leblanc.valall),som.data$unit.classif)
grep("^31$",somclusterlist$som.data.unit.classif,value =T)
##  [1] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
## [16] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
## [31] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
## [46] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
TACC3listall.leblanc <- somclusterlist[grep("^31$",somclusterlist$som.data.unit.classif),]

## use hierarchical clustering to cluster the codebook vectors
som.data$codes <- as.data.frame(som.data$codes)
som_cluster <- cutree(hclust(dist(som.data$codes)),k = 5)
# plot these results:
plot(som.data, type="mapping", bgcol = som_cluster, main = "Clusters")
add.cluster.boundaries(som.data, som_cluster) 

length(intersect(TACC3list.leblanc$rownames.leblanc.val.,TACC3listall.leblanc$rownames.leblanc.valall.)) # 2 (half DE genes)
## [1] 2

Complex Heatmap - SOM derived TACC3 list for average expression (leblanc)

# library(ComplexHeatmap)
# library(circlize)
# library(GetoptLong)
# 
# #Loading the final expression matrix for scaled neftel data
# #exp.matFINAL <- read.table(file = "", header = TRUE, as.is = TRUE, row.names = 1)
# 
# # Cut down expression table to just TACC3 related genes from SOM Clustering
# Tacc3leblanc.mtx <- exp.matFINAL[rownames(exp.matFINAL) %in% TACC3list$rownames.leblanc.val.,]
# 
# #Annotating columns with Cell Cycle Phase
# ha=HeatmapAnnotation(Phase=leblanc$Phase, col=list(Phase=c("S"="lightgreen","G2M"="lightblue","G1"="orange")))
# 
# #Constructing a ComplexHetamp
# ht_list = Heatmap(data.matrix(Tacc3leblanc.mtx), col = colorRamp2(c(-1.5, 0, 1.5), c("blue", "white", "red")),border = "white" ,name = "scaled_expr",row_title = "Clustering of TACC3 related genes from SOM",show_column_names = FALSE,show_row_names = TRUE ,top_annotation = ha,row_names_max_width = unit(3, "cm"),width = unit(10, "cm"),na_col = "black",column_km =3,gap=unit(10,"mm"),row_names_gp = gpar(fontsize = 10),heatmap_legend_param = list(title = "Scaled expression",use_raster = TRUE))
# #save.image("/rds/projects/g/gendood-preclinomics/Bioinfo_Module_6_Group_Project/som_ht_neftel.RData")
# 
# #View the heatmap
# ht_list